Marcelo Ponce, Amit Sandhel (2020). covid19.analytics: An R Package to Obtain, Analyze and Visualize Data from the Corona Virus Disease Pandemic. URL https://arxiv.org/abs/2009.01091
predictionDays <- 90
country <- "Canada"
population <- 37590000
noCache <- TRUE
## [1] "Data dated from: 2021-03-22"
Looking at if new cases can predict hospitalisation 13 days in future.
# 13 jours Test
casCovidParJour <- confirmes_quebec
casCovidParJour <- casCovidParJour[order(casCovidParJour$Date), ]
casCovidParJour$Date <- as.Date(casCovidParJour$Date)
hospTreizeJours <- hosp_quebec
merg <- inner_join(casCovidParJour, hospTreizeJours, by = c("Date"))
merg <- merg[order(merg$Date), ]
# Remove start of pandemy irregular
merg <- merg[-1:-144, ]
newCases <- head(merg$Nb_Nvx_Cas, -13)
icu <- tail(merg$ACT_Si_RSS99, -13)
hosp <- tail(merg$ACT_Hsi_RSS99, -13)
corrNewCasesIcu <- data.frame(newCases, icu)
corrNewCasesHosp <- data.frame(newCases, hosp)
ratioIcu <- mean(newCases / icu)
ratioHosp <- mean(newCases / hosp)
# Mean ratio new cases / hosp + 13 days
# ratioHosp <- 1.97
# ratioIcu <- 10.6
m.icu <- lm(newCases ~ icu, data = corrNewCasesIcu)
coeffIcu <- m.icu$coefficients[2]
interceptIcu <- m.icu$coefficients[1]
m.hosp <- lm(newCases ~ hosp, data = corrNewCasesHosp)
coeffHosp <- m.hosp$coefficients[2]
interceptHosp <- m.hosp$coefficients[1]
merg$Date <- as.Date(merg$Date) + 13
last13Days <- tail(merg, 13)
last13Days$Pred_ACT_Hsi_RSS99 <- as.integer(interceptHosp + (last13Days$Nb_Nvx_Cas/coeffHosp))
last13Days$Pred_ACT_Si_RSS99 <- as.integer(interceptIcu + (last13Days$Nb_Nvx_Cas/coeffIcu))
last13Days <- last13Days %>% select(Date, Nb_Nvx_Cas, ACT_Hsi_RSS99, ACT_Si_RSS99, Pred_ACT_Hsi_RSS99, Pred_ACT_Si_RSS99)
last13Days$ACT_Hsi_RSS99 <- rev(last13Days$ACT_Hsi_RSS99)
last13Days$ACT_Si_RSS99 <- rev(last13Days$ACT_Si_RSS99)
Test de corrélation entre les nouveaux cas et le nombre de patients en soins intensifs 13 jours plus tard
plot(newCases ~ icu, ylab = "Nouveaux cas", xlab = "Soins Internes 13 jours + tard")
plot(rstudent(m.icu) ~ fitted(m.icu),
ylab = "Résidus de Student",
xlab = "Valeurs prédites",
main = "Homogénéité des variances"
)
qqnorm(rstudent(m.icu),
ylab = "Quantiles observés",
xlab = "Quantiles théoriques", main = "Normalité des résidus"
)
qqline(rstudent(m.icu))
summary(m.hosp)
##
## Call:
## lm(formula = newCases ~ hosp, data = corrNewCasesHosp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -509.6 -112.6 -29.5 102.9 470.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.73323 31.03299 0.636 0.526
## hosp 1.91144 0.04414 43.304 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 188.9 on 187 degrees of freedom
## Multiple R-squared: 0.9093, Adjusted R-squared: 0.9088
## F-statistic: 1875 on 1 and 187 DF, p-value: < 0.00000000000000022
summary(m.icu)
##
## Call:
## lm(formula = newCases ~ icu, data = corrNewCasesIcu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -593.31 -195.06 23.01 186.76 612.41
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -70.3878 49.0414 -1.435 0.153
## icu 10.9402 0.3799 28.801 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 269 on 187 degrees of freedom
## Multiple R-squared: 0.816, Adjusted R-squared: 0.815
## F-statistic: 829.5 on 1 and 187 DF, p-value: < 0.00000000000000022
cor.test(corrNewCasesIcu$newCases, corrNewCasesIcu$icu)
##
## Pearson's product-moment correlation
##
## data: corrNewCasesIcu$newCases and corrNewCasesIcu$icu
## t = 28.801, df = 187, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8732007 0.9266040
## sample estimates:
## cor
## 0.9033451
Test de corrélation entre les nouveaux cas et le nombre de patients hospitalisés 13 jours plus tard
plot(newCases ~ hosp, ylab = "Nouveaux cas", xlab = "Hospitalisations 13 jours + tard")
Test de corrélation entre les nouveaux cas et le nombre de patients hospitalisés 13 jours plus tard
cor.test(newCases, hosp)
##
## Pearson's product-moment correlation
##
## data: newCases and hosp
## t = 43.304, df = 187, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9386028 0.9649761
## sample estimates:
## cor
## 0.9535842
last13Days$ACT_Hsi_RSS99 <- rev(last13Days$ACT_Hsi_RSS99)
last13Days$ACT_Si_RSS99 <- rev(last13Days$ACT_Si_RSS99)
plot(last13Days$Date, last13Days$Pred_ACT_Hsi_RSS99, type = "l", col = "red", xlab = "Date", ylab = "Hospitalisations", main = "Prédiction Hospitalisations (13 Jours + Tard)", ylim = c(0, 1500))
lines(last13Days$Date, last13Days$ACT_Hsi_RSS99, col = "blue")
legend(c("bottomleft"), 0, legend = c("Pred + 13 jours", "- 13 jours"), col = c("red", "blue"), lty = 1:1, cex = 0.8)
text(last13Days$Date, last13Days$Pred_ACT_Hsi_RSS99 + 25, labels = as.character(last13Days$Pred_ACT_Hsi_RSS99))
text(median(last13Days$Date), 1200, paste("Moyenne +13 jours: ", as.integer(mean(last13Days$Pred_ACT_Hsi_RSS99))))
text(median(last13Days$Date), 1000, paste("Moyenne -13 jours: ", as.integer(mean(last13Days$ACT_Hsi_RSS99))))
plot(last13Days$Date, last13Days$Pred_ACT_Si_RSS99, type = "l", col = "red", xlab = "Date", ylab = "Soins Intensifs", main = "Prédiction Soins Intensifs (13 Jours + Tard)", ylim = c(0, 300))
lines(last13Days$Date, last13Days$ACT_Si_RSS99, col = "blue")
legend(c("bottomleft"), 0, legend = c("Pred + 13 jours", "- 13 jours"), col = c("red", "blue"), lty = 1:1, cex = 0.8)
text(last13Days$Date, last13Days$Pred_ACT_Si_RSS99 + 25, labels = as.character(last13Days$Pred_ACT_Si_RSS99))
text(median(last13Days$Date), 250, paste("Moyenne +13 jours: ", as.integer(mean(last13Days$Pred_ACT_Si_RSS99))))
text(median(last13Days$Date), 200, paste("Moyenne -13 jours: ", as.integer(mean(last13Days$ACT_Si_RSS99))))
plot(hosp_quebec$ACT_Si_RSS99, ylim = c(0, max(hosp_quebec$ACT_Si_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations Soins Intensifs", xlab = "Jours", main = "Quebec: Actuel - Soins Intensifs")
previsions(hosp_quebec$Date, hosp_quebec$ACT_Si_RSS99, "Québec - Prévision Soins Intensifs ", "Date", "Patients", 90)
plot(hosp_quebec$ACT_Hsi_RSS99, ylim = c(0, max(hosp_quebec$ACT_Hsi_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations", xlab = "Jours", main = "Quebec: Actuel - Hospitalisations")
previsions(hosp_quebec$Date, hosp_quebec$ACT_Hsi_RSS99, "Québec - Prévision Hospitalisations ", "Date", "Patients", 90)
plot(confirmes_quebec$Nb_Nvx_Cas, ylim = c(0, max(confirmes_quebec$Nb_Nvx_Cas)), xlim = c(0, nrow(confirmes_quebec)), ylab = "Cas", xlab = "Jours", main = "Quebec : Actuel - Cas")
previsions(confirmes_quebec$Date, confirmes_quebec$Nb_Nvx_Cas, "Québec - Prévision - Cas ", "Date", "Cas", 90)
plot(deces_quebec$total_jour, ylim = c(0, max(deces_quebec$total_jour)), xlim = c(0, nrow(deces_quebec)), ylab = "Décès", xlab = "Jours", main = "Quebec : Actuel - Décès")
previsions(deces_quebec$Date, deces_quebec$total_jour, "Québec - Prévision - Décès ", "Date", "Décès", 90)
plot(canada$total_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, 450), ylim = c(0, 30000), main = paste(country, " - Total Morts"))
plot(canada$total_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, 450), , ylim = c(0, 1000000), main = paste(country, " - Total Cas"))
plot(canada$new_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, 450), ylim = c(0, 300), main = paste(country, " - Morts / Jour"))
plot(canada$new_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, 450), main = paste(country, " - Nouveaux Cas / Jour"))
plot(canada$total_cases_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, 450), , main = "Total Cases /1 Million Population", col = "green", ylim = c(0, 100000), cex.axis = 0.75)
points(usa$total_cases_per_million, col = "red")
legend(1, 40000, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
plot(canada$total_deaths_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, 450), main = "Total Deaths /1 Million Population", col = "green", ylim = c(0, 1600), cex.axis = 0.75)
points(usa$total_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
plot(canada$new_cases_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, 450), main = "New Cases/1 Million Population per Day", col = "green", ylim = c(0, 1100), cex.axis = 0.75)
points(usa$new_cases_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
plot(canada$new_deaths_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, 450), main = "New Deaths/1 Million Population per Day", col = "green", ylim = c(0, 15), cex.axis = 0.75)
points(usa$new_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
tsCan <- ts(canada$new_deaths_per_million, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan), h = 90), sub = "Forecast 90 jours")
tsCan2 <- ts(canada$new_deaths, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan2), h = 90), sub = "Forecast 90 jours", ylab = "Nb de morts par jour", xlab = "Jours")
tsc_us <- tsc %>% filter(Country.Region == country)
tsc_us <- data.frame(t(tsc_us))
tsc_us <- cbind(rownames(tsc_us), data.frame(tsc_us, row.names = NULL))
colnames(tsc_us) <- c("Date", "Confirmed")
tsc_us <- tsc_us[-c(1:4), ]
tsc_us$Date <- ymd(tsc_us$Date)
tsc_us$Confirmed <- as.numeric(tsc_us$Confirmed)
str(tsc_us)
tsd_us <- tsd %>% filter(Country.Region == country)
tsd_us <- data.frame(t(tsd_us))
tsd_us <- cbind(rownames(tsd_us), data.frame(tsd_us, row.names = NULL))
colnames(tsd_us) <- c("Date", "Confirmed")
tsd_us <- tsd_us[-c(1:4), ]
tsd_us$Date <- ymd(tsd_us$Date)
tsd_us$Confirmed <- as.numeric(tsd_us$Confirmed)
str(tsd_us)
qplot(Date, Confirmed, data = tsc_us, main = paste("Covid19 - Number of confirmed Cases in ", country))
qplot(Date, Confirmed, data = tsd_us, main = paste("Covid19 - Number of confirmed deaths in ", country))
ds <- tsc_us$Date
y <- tsc_us$Confirmed
df <- data.frame(ds, y)
colnames(df) <- c("ds", "y")
ds2 <- tsd_us$Date
y2 <- tsd_us$Confirmed
df2 <- data.frame(ds2, y2)
colnames(df2) <- c("ds", "y")
tsd_us$delta <- c(NA, diff(tsd_us$Confirmed, lag = 1))
tsd_us$month <- format(as.Date(tsd_us$Date), "%m")
tsd_us$week <- week(as.Date(tsd_us$Date))
tsd_us$delta[is.na(tsd_us$delta)] <- 0
dfChunked <- df[-(1:100), , drop = FALSE]
m <- prophet(dfChunked, yearly.seasonality = F, daily.seasonality = F)
future <- make_future_dataframe(m, periods = predictionDays)
forcast <- predict(m, future)
plot(m, forcast, xlabel = "Date", ylabel = "Cases") + ggtitle(paste(country, " - Predicted Covid Caseses ", predictionDays, " days"))
dfChunked2 <- df2[-(1:100), , drop = FALSE]
m2 <- prophet(dfChunked2, yearly.seasonality = F, daily.seasonality = F)
future2 <- make_future_dataframe(m2, periods = predictionDays)
forcast2 <- predict(m2, future2)
plot(m2, forcast2, xlabel = "Date", ylabel = "Deaths") + ggtitle(paste(country, " - Predicted Covid Deaths ", predictionDays, " days"))
dyplot.prophet(m, forcast,
main = paste(country, " - Predicted Covid Cases ", predictionDays, " days - FB Prophet"),
xlab = "Date", ylab = "Cases"
) %>% dyOptions(maxNumberWidth = 20)
dyplot.prophet(m2, forcast2,
main = paste(country, " - Predicted Covid Deaths ", predictionDays, " days - FB Prophet"),
xlab = "Date", ylab = "Deaths"
) %>% dyOptions(maxNumberWidth = 20)
prophet_plot_components(m, forcast)
prophet_plot_components(m2, forcast2)
l <- nrow(dfChunked)
pred <- forcast$yhat[1:l]
actual <- m$history$y
plot(actual, pred, main = paste(country, " Cases - Prediction vs Actual"))
abline(lm(pred ~ actual), col = "red")
pred2 <- forcast2$yhat[1:l]
actual2 <- m2$history$y
plot(actual2, pred2, main = paste(country, " Deaths - Prediction vs Actual"))
abline(lm(pred2 ~ actual2), col = "red")
summary(lm(pred ~ actual))
##
## Call:
## lm(formula = pred ~ actual)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1114.26 -133.81 -16.69 104.86 2726.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4529502 33.7324684 0.162 0.872
## actual 0.9998906 0.0004819 2075.106 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 427.5 on 324 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 4.306e+06 on 1 and 324 DF, p-value: < 0.00000000000000022
summary(lm(pred2 ~ actual2))
##
## Call:
## lm(formula = pred2 ~ actual2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.796 -6.911 -0.742 4.484 97.576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.070194 1.690698 0.633 0.527
## actual2 0.998326 0.001876 532.130 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.43 on 324 degrees of freedom
## Multiple R-squared: 0.9989, Adjusted R-squared: 0.9989
## F-statistic: 2.832e+05 on 1 and 324 DF, p-value: < 0.00000000000000022
rmarkdown::render(“COVID_CANADA.Rmd”)